********************************************************************************
* Data preparation: Income variables (some of which will count as transfer)
********************************************************************************

include choose_version.do
	 
* Compute all income variables at the household level
* For topcoding procedures and values in the CPS, see https://cps.ipums.org/cps/topcodes_tables.shtml

* Income variables

* hhincome (total household income): sum of all household members' individual incomes (inctot) during previous year; based on who lives together in survey year
* variable is at the household level and the same within household
replace hhincome = . if hhincome == 99999999 

* inctot: total personal income; pre-tax for previous calendar year
* individual level
replace inctot = . if inctot == 999999999 
bysort serial: egen inctot_hh = total(inctot), missing
label var inctot_hh "total household income, aggregated from individual"
assert inctot_hh == hhincome

* incwage: total pre-tax wage and salary income for the previous year
* individual level
replace incwage = . if incwage == 99999999 
gen incwage_tc = .
replace incwage_tc = . if incwage == .
replace incwage_tc = 0 if incwage < 9999999	
replace incwage_tc = 1 if incwage >= 9999999 & incwage != .
label var incwage_tc "1 if wage and salary income exceeds topcoding threshold"
bysort serial: egen incwage_hh = total(incwage), missing
label var incwage_hh "total household wage and salary income, aggregated from individual"
bysort serial: egen incwage_hh_tc = total(incwage_tc), missing
replace incwage_hh_tc = 1 if incwage_hh_tc >= 1 & incwage_hh_tc != .
label var incwage_hh_tc "1 if one wage and salary observation in HH exceeds TC threshold"

* incbus: total pre-income-tax non-farm business and/or professional practice income for the previous calendar year
* individual level
* note: separate components of this are available
replace incbus = . if incbus == 99999999 
bysort serial: egen incbus_hh = total(incbus), missing
label var incbus_hh "total household non-farm business income, aggregated from individual"

* incfarm: total pre-income-tax earnings as a tenant farmer, sharecropper, or operator of his or her own farm during the previous calendar year
* individual level
* note: separate components of this are available
replace incfarm = . if incfarm == 99999999 
bysort serial: egen incfarm_hh = total(incfarm), missing
label var incfarm_hh "total household farm income, aggregated from individual"

* incss: total pre-tax income (if any) the respondent received from Social Security
* individual level
replace incss = . if incss == 999999 
gen incss_tc = .
replace incss_tc = . if incss == .
replace incss_tc = 0 if incss < 50000	
replace incss_tc = 1 if incss >= 50000 & incss != .
label var incss_tc "1 if social security income exceeds topcoding threshold"
bysort serial: egen incss_hh = total(incss), missing
label var incss_hh "total household social security income, aggregated from individual"
bysort serial: egen incss_hh_tc = total(incss_tc), missing
replace incss_hh_tc = 1 if incss_hh_tc >= 1 & incss_hh_tc != .
label var incss_hh_tc "1 if one SS income observation in HH exceeds TC threshold"

* incwelfr: total pre-tax income the respondent received during the previous calendar year from various public assistance programs commonly referred to as "welfare"
* individual level
replace incwelfr = . if incwelfr == 999999 
gen incwelfr_tc = .
replace incwelfr_tc = . if incwelfr == .
replace incwelfr_tc = 0 if incwelfr < 25000	
replace incwelfr_tc = 1 if incwelfr >= 25000 & incwelfr != .
label var incwelfr_tc "1 if welfare income exceeds topcoding threshold"
bysort serial: egen incwelfr_hh = total(incwelfr), missing
label var incwelfr_hh "total household welfare income, aggregated from individual"
bysort serial: egen incwelfr_hh_tc = total(incwelfr_tc), missing
replace incwelfr_hh_tc = 1 if incwelfr_hh_tc >= 1 & incwelfr_hh_tc != .
label var incwelfr_hh_tc "1 if one welfare income observation in HH exceeds TC threshold"

* incretir: pre-tax income (if any) the respondent received from all retirement income sources during the past year
* individual level
* note: changed definition in 2019
* note: there are variables increti1 and increti2 which are topcoded; but: "Researchers interested in the total amount of retirement income received, rather than the amount received from specific sources, should use INCRETIR. Because INCRETIR reports the value of retirement income from all sources, the amount reported in that variable may be greater than the sum of INCRETI1 and INCRETI2."
replace incretir = . if incretir == 99999999 
bysort serial: egen incretir_hh = total(incretir), missing
label var incretir_hh "total household retirement income, aggregated from individual"

* incssi: total pre-tax income (if any) the respondent received from Supplemental Security Income (SSI) during the previous calendar year
* individual level
replace incssi = . if incssi == 999999 
gen incssi_tc = .
replace incssi_tc = . if incssi == .
replace incssi_tc = 0 if incssi < 25000	
replace incssi_tc = 1 if incssi >= 25000 & incssi != .
label var incssi_tc "1 if SSI income exceeds topcoding threshold"
bysort serial: egen incssi_hh = total(incssi), missing
label var incssi_hh "total household SSI income, aggregated from individual"
bysort serial: egen incssi_hh_tc = total(incssi_tc), missing
replace incssi_hh_tc = 1 if incssi_hh_tc >= 1 & incssi_hh_tc != .
label var incssi_hh_tc "1 if one SSI income observation in HH exceeds TC threshold"

* incint: total pre-tax income (if any) the respondent received from interest on saving accounts, certificates of deposit, money market funds, bonds, treasury notes, IRAs, and/or other investments which paid interest
* individual level
replace incint = . if incint == 9999999 
gen incint_tc = .
replace incint_tc = . if incint == .
replace incint_tc = 0 if incint < 99999	
replace incint_tc = 1 if incint >= 99999 & incint != .
label var incint_tc "1 if interest income exceeds topcoding threshold"
bysort serial: egen incint_hh = total(incint), missing
label var incint_hh "total household interest income, aggregated from individual"
bysort serial: egen incint_hh_tc = total(incint_tc), missing
replace incint_hh_tc = 1 if incint_hh_tc >= 1 & incint_hh_tc != .
label var incint_hh_tc "1 if one interest income observation in HH exceeds TC threshold"

* incunemp: total pre-tax income (if any) the respondent received from state or federal unemployment compensation, Supplemental Unemployment Benefits (SUB), or union unemployment or strike benefits during the previous calendar year
* individual level
replace incunemp = . if incunemp == 999999  
gen incunemp_tc = .
replace incunemp_tc = . if incunemp == .
replace incunemp_tc = 0 if incunemp < 99999	
replace incunemp_tc = 1 if incunemp >= 99999 & incunemp != .
label var incunemp_tc "1 if UI income exceeds topcoding threshold"
bysort serial: egen incunemp_hh = total(incunemp), missing
label var incunemp_hh "total household UI income, aggregated from individual"
bysort serial: egen incunemp_hh_tc = total(incunemp_tc), missing
replace incunemp_hh_tc = 1 if incunemp_hh_tc >= 1 & incunemp_hh_tc != .
label var incunemp_hh_tc "1 if one UI income observation in HH exceeds TC threshold"

* incwkcom: total pre-tax income (if any) the respondent received from worker's compensation payments or other payments as a result as a job-related injury or illness
* individual level
replace incwkcom = . if incwkcom == 999999 
gen incwkcom_tc = .
replace incwkcom_tc = . if incwkcom == .
replace incwkcom_tc = 0 if incwkcom < 99999	
replace incwkcom_tc = 1 if incwkcom >= 99999 & incwkcom != .
label var incwkcom_tc "1 if worker's compensation income exceeds topcoding threshold"
bysort serial: egen incwkcom_hh = total(incwkcom), missing
label var incwkcom_hh "total household worker's compensation income, aggregated from individual"
bysort serial: egen incwkcom_hh_tc = total(incwkcom_tc), missing
replace incwkcom_hh_tc = 1 if incwkcom_hh_tc >= 1 & incwkcom_hh_tc != .
label var incwkcom_hh_tc "1 if one worker's compensation income observation in HH exceeds TC threshold"

* incvet: pre-tax income (if any) the respondent received from payments from the Veterans' Administration (VA) during the previous calendar year
* individual level
replace incvet = . if incvet == 9999999 
gen incvet_tc = .
replace incvet_tc = . if incvet == .
replace incvet_tc = 0 if incvet < 99999	
replace incvet_tc = 1 if incvet >= 99999 & incvet != .
label var incvet_tc "1 if veteran's benefit income exceeds topcoding threshold"
bysort serial: egen incvet_hh = total(incvet), missing
label var incvet_hh "total household veteran's benefit income, aggregated from individual"
bysort serial: egen incvet_hh_tc = total(incvet_tc), missing
replace incvet_hh_tc = 1 if incvet_hh_tc >= 1 & incvet_hh_tc != .
label var incvet_hh_tc "1 if one veteran's benefit income observation in HH exceeds TC threshold"

* incsurv: pre-tax income (if any) the respondent received from survivors' benefits during the previous calendar year
* individual level
* note: there are variables incsurv1 and incsurv2 which are topcoded; but: "Researchers interested in the total amount of income received from survivor benefits, rather than the amount received from specific sources, should use INCSURV. Because INCSURV reports the value of survivor benefits from all sources, the amount reported in that variable may be greater than the sum of INCSURV1 and INCSURV2."
replace incsurv = . if incsurv == 9999999 
bysort serial: egen incsurv_hh = total(incsurv), missing
label var incsurv_hh "total household survivor's benefits income, aggregated from individual"

* incdisab: pre-tax income (if any) the respondent received from disability income during the previous calendar year
* individual level
* note: there are variables incdisa1 and incdisa2 which are topcoded; but: "Researchers interested in the total amount of disability income received, rather than the amount received from specific sources, should use INCDISAB. Because INCDISAB reports the value of disability payments from all sources, the amount reported in that variable may be greater than the sum of INCDISA1 and INCDISA2."
replace incdisab = . if incdisab == 9999999 
bysort serial: egen incdisab_hh = total(incdisab), missing
label var incdisab_hh "total household disability benefits income, aggregated from individual"

* incdivid: pre-tax income (if any) the respondent received from stocks and mutual funds during the previous calendar year
* individual level
replace incdivid = . if incdivid == 9999999 
gen incdivid_tc = .
replace incdivid_tc = . if incdivid == .
replace incdivid_tc = 0 if incdivid < 999999	
replace incdivid_tc = 1 if incdivid >= 999999 & incdivid != .
label var incdivid_tc "1 if dividend income exceeds topcoding threshold"
bysort serial: egen incdivid_hh = total(incdivid), missing
label var incdivid_hh "total household dividend income, aggregated from individual"
bysort serial: egen incdivid_hh_tc = total(incdivid_tc), missing
replace incdivid_hh_tc = 1 if incdivid_hh_tc >= 1 & incdivid_hh_tc != .
label var incdivid_hh_tc "1 if one dividend income observation in HH exceeds TC threshold"

* incrent: pre-tax income (if any) the respondent received from rent (after expenses), from charges to roomers or boarders, and from money paid by estates, trusts, and royalties, during the previous calendar year
* individual level
replace incrent = . if incrent == 9999999 
gen incrent_tc = .
replace incrent_tc = . if incrent == .
replace incrent_tc = 0 if incrent < 99999	
replace incrent_tc = 1 if incrent >= 99999 & incrent != .
label var incrent_tc "1 if dividend income exceeds topcoding threshold"
bysort serial: egen incrent_hh = total(incrent), missing
label var incrent_hh "total household dividend income, aggregated from individual"
bysort serial: egen incrent_hh_tc = total(incrent_tc), missing
replace incrent_hh_tc = 1 if incrent_hh_tc >= 1 & incrent_hh_tc != .
label var incrent_hh_tc "1 if one dividend income observation in HH exceeds TC threshold"

* inceduc: pre-tax income (if any) the respondent received from educational assistance during the previous calendar year
* individual level
replace inceduc = . if inceduc == 999999 
gen inceduc_tc = .
replace inceduc_tc = . if inceduc == .
replace inceduc_tc = 0 if inceduc < 99999	
replace inceduc_tc = 1 if inceduc >= 99999 & inceduc != .
label var inceduc_tc "1 if educational assistance income exceeds topcoding threshold"
bysort serial: egen inceduc_hh = total(inceduc), missing
label var inceduc_hh "total household educational assistance income, aggregated from individual"
bysort serial: egen inceduc_hh_tc = total(inceduc_tc), missing
replace inceduc_hh_tc = 1 if inceduc_hh_tc >= 1 & inceduc_hh_tc != .
label var inceduc_hh_tc "1 if one educational assistance income observation in HH exceeds TC threshold"

* incchild: pre-tax income (if any) the respondent received from child support payments during the previous calendar year
* individual level
replace incchild = . if incchild == 999999 
gen incchild_tc = .
replace incchild_tc = . if incchild == .
replace incchild_tc = 0 if incchild < 99999	
replace incchild_tc = 1 if incchild >= 99999 & incchild != .
label var incchild_tc "1 if child support income exceeds topcoding threshold"
bysort serial: egen incchild_hh = total(incchild), missing
label var incchild_hh "total household child support income, aggregated from individual"
bysort serial: egen incchild_hh_tc = total(incchild_tc), missing
replace incchild_hh_tc = 1 if incchild_hh_tc >= 1 & incchild_hh_tc != .
label var incchild_hh_tc "1 if one child support income observation in HH exceeds TC threshold"

* incalim: pre-tax income (if any) the respondent received from alimony payments during the previous calendar year
* individual level
* note: only until 2014
replace incalim = . if incalim == 999999 
gen incalim_tc = .
replace incalim_tc = . if incalim == .
replace incalim_tc = 0 if incalim < 99999	
replace incalim_tc = 1 if incalim >= 99999 & incalim != .
label var incalim_tc "1 if alimony income exceeds topcoding threshold"
bysort serial: egen incalim_hh = total(incalim), missing
label var incalim_hh "total household alimony income, aggregated from individual"
bysort serial: egen incalim_hh_tc = total(incalim_tc), missing
replace incalim_hh_tc = 1 if incalim_hh_tc >= 1 & incalim_hh_tc != .
label var incalim_hh_tc "1 if one alimony income observation in HH exceeds TC threshold"

* incasist: pre-tax income (if any) the respondent received during the previous calendar year from regular financial assistance from friends or relatives not living in the same household
* individual level
replace incasist = . if incasist == 9999999 
gen incasist_tc = .
replace incasist_tc = . if incasist == .
replace incasist_tc = 0 if incasist < 99999	
replace incasist_tc = 1 if incasist >= 99999 & incasist != .
label var incasist_tc "1 if assistance income exceeds topcoding threshold"
bysort serial: egen incasist_hh = total(incasist), missing
label var incasist_hh "total household assistance income, aggregated from individual"
bysort serial: egen incasist_hh_tc = total(incasist_tc), missing
replace incasist_hh_tc = 1 if incasist_hh_tc >= 1 & incasist_hh_tc != .
label var incasist_hh_tc "1 if one assistance income observation in HH exceeds TC threshold"

* incother: pre-tax income for the previous calendar year that was not reported in other, more specific, income variables
* individual level
replace incother = . if incother == 9999999 
gen incother_tc = .
replace incother_tc = . if incother == .
replace incother_tc = 0 if incother < 99999	
replace incother_tc = 1 if incother >= 99999 & incother != .
label var incother_tc "1 if other income exceeds topcoding threshold"
bysort serial: egen incother_hh = total(incother), missing
label var incother_hh "total household other income, aggregated from individual"
bysort serial: egen incother_hh_tc = total(incother_tc), missing
replace incother_hh_tc = 1 if incother_hh_tc >= 1 & incother_hh_tc != .
label var incother_hh_tc "1 if one other income observation in HH exceeds TC threshold"

* Compare individual components sum to total income 
egen inctot_gen = rowtotal(incwage incbus incfarm incss incwelfr incretir incssi incint incunemp incwkcom incvet incsurv incdisab incdivid incrent inceduc incchild incalim incasist incother), missing
assert inctot_gen == inctot
drop inctot_gen

********************************************************************************
* Summary income variables
********************************************************************************

* universe is everyone age 15+; missing values for children; zeroes for everybody else if income category does not apply

* Individual labor income
gen incfarm_23 = (2/3)*incfarm
gen incfarm_13 = (1/3)*incfarm 
gen incbus_23 = (2/3)*incbus
gen incbus_13 = (1/3)*incbus
if `version_robust' == 2 {
	egen inc_lab = rowtotal(incwage incbus incfarm incwkcom), missing
}
else if `version_robust' == 7 {
	egen inc_lab = rowtotal(incwage incbus_23 incfarm_23 incunemp incwkcom), missing
}
else {
	egen inc_lab = rowtotal(incwage incbus incfarm incunemp incwkcom), missing
}
label var inc_lab "Individual labor income"

* Household labor income
bysort serial: egen inc_lab_hh = total(inc_lab), missing
label var inc_lab_hh "Household labor income"

* Individual total income
if `version_robust' == 3 {
	egen inc_tot = rowtotal(inc_lab incint incdivid incrent inceduc incother incss incretir incvet incsurv incdisab incchild incalim incasist), missing
}
else if `version_robust' == 7 {
	egen inc_tot = rowtotal(inc_lab incfarm_13 incbus_13 incint incdivid incrent inceduc incother), missing
}
else {
	egen inc_tot = rowtotal(inc_lab incint incdivid incrent inceduc incother), missing
}
label var inc_tot "Individual total income"
drop incfarm_13 incfarm_23 incbus_13 incbus_23

* Household total income
bysort serial: egen inc_tot_hh = total(inc_tot), missing
label var inc_tot_hh "Household total income"
